package com.pan.crawler.impl;

import com.pan.crawler.Page;
import com.pan.entity.Chapter;
import org.openqa.selenium.By;
import org.openqa.selenium.edge.EdgeDriver;

import java.time.Duration;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 这个只能爬https://www.31xiaoshuo.com这个网站
 */
public class PageImpl implements Page {


    private EdgeDriver edgeDriver;

    public PageImpl(){
        edgeDriver = new EdgeDriver();
        edgeDriver.manage().window().minimize();
        edgeDriver.manage().timeouts().implicitlyWait(Duration.ofSeconds(3));
    }

    public String clear(String content){
        String[] delRegex = new String[]{
                "【稳定运行多年的小说app，媲美老版追书神器，老书虫都在用的换源App，huanyuanapp.org】",
                "加入书签$"
        };
        for (int i = 0; i < delRegex.length; i++) {
            Pattern pattern = Pattern.compile(delRegex[i]);
            Matcher matcher = pattern.matcher(content);
            content = matcher.replaceAll("");
        }
        return content;
    }

    @Override
    public String get(Chapter chapter) {
        String url = chapter.getUrl();
        String content = "";
        for (int i = 0; i < 5; i++) {
            try {
                edgeDriver.navigate().to(url);
                content = edgeDriver.findElement(By.id("content")).getText();
                break;
            }catch (Exception e){
                if (i>=3) throw new RuntimeException("请求页面失败");
            }
        }
        String title = chapter.getTitle();
        content ="\n\n\n\n\n\n\n"+title+"\n\n\n\n"+ clear(content);
        return content;
    }

    @Override
    public void close() {
        edgeDriver.quit();
    }
}
